# -*- coding: utf-8 -*-
from mpi4py import MPI
from pybaram.backends.types import Kernel, MetaKernel
from pybaram.inifile import INIFile
from pybaram.integrators.base import BaseIntegrator
from pybaram.utils.misc import ProxyList
from pybaram.utils.np import eps
import numpy as np
import re
class BaseSteadyIntegrator(BaseIntegrator):
mode = 'steady'
impl_op = 'none'
def __init__(self, be, cfg, msh, soln, comm):
# get MPI_COMM_WORLD
self._comm = comm
# Get configurations for iterators
self.itermax = cfg.getint('solver-time-integrator', 'max-iter')
self.tol = cfg.getfloat('solver-time-integrator', 'tolerance')
# Set current iteration
if soln:
# Get current iteration from previous result
stats = INIFile()
stats.fromstr(soln['stats'])
self.iter = stats.getint('solver-time-integrator', 'iter', 0)
if self.iter > 0:
self.resid0 = np.array(stats.getlist(
'solver-time-integrator', 'resid0'))
else:
# Initialize iteration
self.iter = 0
# Indicator if solution is converted or not
self.isconv = False
super().__init__(be, cfg, msh, soln, comm)
# Get CFL number
self._cfl0 = cfg.getfloat('solver-time-integrator', 'cfl', 1.0)
# Get configuration of CFL linear ramp
self._cfl_iter0 = cfg.getint('solver-cfl-ramp', 'iter0', self.itermax)
self._cfl_itermax = cfg.getint('solver-cfl-ramp', 'max-iter', self.itermax)
self._cflmax = cfg.getfloat('solver-cfl-ramp', 'max-cfl', self._cfl0)
# Caculate increment of cfl for CFL ramp
if self._cfl_itermax > self._cfl_iter0:
self._dcfl = (self._cflmax - self._cfl0) / (self._cfl_itermax - self._cfl_iter0)
else:
self._dcfl = 0
# For turbulent simulation
if self.sys.name.startswith('rans'):
self._is_turb = True
# Get turbulent CFL factor
self._tcfl_fac = cfg.getfloat('solver-time-integrator', 'turb-cfl-factor', 1.0)
else:
self._is_turb = False
# Specify residual variable for monitoring
ele = next(iter(self.sys.eles))
self.conservars = conservars = ele.conservars
rvar = cfg.get('solver-time-integrator', 'res-var', 'rho')
self._res_idx = [i for i, e in enumerate(conservars) if e == rvar][0]
# Get total volume
voli = sum(self.sys.eles.tot_vol)
self.vol = comm.allreduce(voli, op=MPI.SUM)
# Construct kernels
self.cfg = cfg
self.construct_stages()
@property
def _cfl(self):
# Return CFL considering CFL ramp
if self.iter < self._cfl_iter0:
return self._cfl0
elif self.iter > self._cfl_itermax:
return self._cflmax
else:
return self._cfl0 + self._dcfl*(self.iter - self._cfl_iter0)
def run(self):
# Run integerator until max iteration
while self.iter < self.itermax:
# Compute one iteration
self.advance_to()
# Check if tolerance is satisfied
residual = (self.resid / self.resid0)
if residual[self._res_idx] < self.tol:
break
# Fire off plugins
self.isconv = True
self.completed_handler(self)
self.print_res(residual)
def complete_step(self, resid):
self.resid = resid
# Check if reference residual (resid0) is existed or not
if not hasattr(self, 'resid0'):
# Avoid zero in resid0
norm = self.cfg.get('solver-time-integrator', 'res-norm', 'True')
if norm.lower() == 'no' or norm.lower() == 'false':
self.resid0 = [1.0 for r in self.resid]
else:
self.resid0 = [r if r != 0 else eps for r in self.resid]
self.iter += 1
self.completed_handler(self)
def _make_stages(self, out, *args):
# Generate formulation of each RK stage
eq_str = '+'.join('{}*upts[{}][j, idx]'.format(a, i) for a, i in zip(args[::2], args[1::2]))
if self._is_turb:
# Substitute 'dt' string as dt array
eqf_str = re.sub('dt', 'dt[idx]', eq_str)
eqt_str = re.sub('dt', '{}*dt[idx]'.format(self._tcfl_fac), eq_str)
# Generate Python function for each RK stage
f_txt =(
f"def stage(i_begin, i_end, dt, *upts):\n"
f" for idx in range(i_begin, i_end):\n"
f" for j in range(nfvars):\n"
f" upts[{out}][j, idx] = {eqf_str}\n"
f" for j in range(nfvars, nvars):\n"
f" upts[{out}][j, idx] = {eqt_str}"
)
else:
# Substitute 'dt' string as dt array
eq_str = re.sub('dt', 'dt[idx]', eq_str)
# Generate Python function for each RK stage
f_txt =(
f"def stage(i_begin, i_end, dt, *upts):\n"
f" for idx in range(i_begin, i_end):\n"
f" for j in range(nvars):\n"
f" upts[{out}][j, idx] = {eq_str}"
)
kernels = []
for ele in self.sys.eles:
# Initiate Python function of RK stage for each element
gvars = {'nvars' : ele.nvars}
lvars = {}
exec(f_txt, gvars, lvars)
# Generate JIT kernel by looping RK stage function
_stage = self.be.make_loop(ele.neles, lvars['stage'])
kernels.append(Kernel(_stage, ele.dt, *ele.upts))
# Collect RK stage kernels for elements
return MetaKernel(kernels)
def _local_dt(self):
# Compute timestep of each cell using CFL
self.sys.timestep(self._cfl, self._curr_idx)
def advance_to(self):
# Compute dt
self._local_dt()
# Compute one RK step
self._curr_idx, resid = self.step()
# Post actions after iteration
self.complete_step(resid)
def rhs(self, idx_in=0, idx_out=1, is_norm=False):
# Compute right hand side
residi = self.sys.rhside(idx_in, idx_out, is_norm=is_norm)
# Compute L2 norm residual
if is_norm:
resid = self._comm.allreduce(residi, op=MPI.SUM)
return np.sqrt(resid) / self.vol
def print_res(self, residual):
# Print residual result
idx = self._res_idx
res = residual[idx]
if res < self.tol:
print("Converged : Residual of {} = {:05g} <= {:05g}".format(
self.conservars[idx], res, self.tol))
else:
print("Not converged : Residual of {} = {:05g} > {:05g}".format(
self.conservars[idx], res, self.tol))
class EulerExplicit(BaseSteadyIntegrator):
name = 'eulerexplicit'
nreg = 2
def construct_stages(self):
self._stages = [self._make_stages(0, 1, 0, 'dt', 1)]
def step(self):
stages = self._stages
resid = self.rhs(0, 1, is_norm=True)
stages[0]()
self.sys.post(0)
return 0, resid
class TVDRK3(BaseSteadyIntegrator):
name = 'tvd-rk3'
nreg = 3
def construct_stages(self):
self._stages = [
self._make_stages(2, 1, 0, 'dt', 1),
self._make_stages(2, 3/4, 0, 1/4, 2, 'dt/4', 1),
self._make_stages(0, 1/3, 0, 2/3, 2, '2*dt/3', 1),
]
def step(self):
stages = self._stages
self.rhs()
stages[0]()
self.sys.post(2)
self.rhs(2, 1)
stages[1]()
self.sys.post(2)
resid = self.rhs(2, 1, is_norm=True)
stages[2]()
self.sys.post(0)
return 0, resid
[docs]
class FiveStageRK(BaseSteadyIntegrator):
"""
Jameson Multistage scheme
ref : Blazek book 6.1.1 (Table 6.1)
"""
name = 'rk5'
nreg = 3
[docs]
def construct_stages(self):
self._stages = [
self._make_stages(2, 1, 0, '0.0533*dt', 1),
self._make_stages(2, 1, 0, '0.1263*dt', 1),
self._make_stages(2, 1, 0, '0.2375*dt', 1),
self._make_stages(2, 1, 0, '0.4414*dt', 1),
self._make_stages(0, 1, 0, 'dt', 1)
]
[docs]
def step(self):
stages = self._stages
self.rhs()
stages[0]
self.sys.post(2)
self.rhs(2, 1)
stages[1]()
self.sys.post(2)
self.rhs(2, 1)
stages[2]()
self.sys.post(2)
self.rhs(2, 1)
stages[3]()
self.sys.post(2)
resid = self.rhs(2, 1, is_norm=True)
stages[4]()
self.sys.post(0)
return 0, resid
[docs]
class LUSGS(BaseSteadyIntegrator):
name = 'lu-sgs'
nreg = 3
impl_op = 'spectral-radius'
[docs]
def construct_stages(self):
from pybaram.integrators.lusgs import make_lusgs_common, make_lusgs_update, make_serial_lusgs
be = self.be
# LU-SGS for each elements
for ele in self.sys.eles:
# Get reordering result
mapping, unmapping = ele.reordering()
# diagonal and lambda array
diag = np.empty(ele.neles)
# Get Python functions of flux and wave speed
_flux = ele.flux_container()
nv = (0, ele.nfvars)
# Compile LU-SGS functions
_update = make_lusgs_update(ele)
_pre_lusgs = make_lusgs_common(ele, factor=1.0)
_lsweep, _usweep = make_serial_lusgs(
be, ele, nv, mapping, unmapping, _flux
)
# Initiate LU-SGS kernel objects
pre_lusgs = Kernel(
be.make_loop(ele.neles, _pre_lusgs),
ele.dt, diag, ele.fspr
)
lsweeps = Kernel(be.make_loop(ele.neles, func=_lsweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.fspr)
usweeps = Kernel(be.make_loop(ele.neles, func=_usweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.fspr)
kernels = [pre_lusgs, lsweeps, usweeps]
# LU-SGS for turbulent variables
if self._is_turb:
# Get Python function of flux and wave speed for turbulent variables
_tflux = ele.tflux_container()
tnv = (ele.nfvars, ele.nvars)
# Compile LU-SGS functions for turbulent variables
_pre_tlusgs = make_lusgs_common(ele, factor=self._tcfl_fac)
_tlsweep, _tusweep = make_serial_lusgs(
be, ele, tnv, mapping, unmapping, _tflux
)
# Initiate LU-SGS kernel objects for turbulent variables
pre_tlusgs = Kernel(
be.make_loop(ele.neles, _pre_tlusgs),
ele.dt, diag, ele.tfspr
)
tlsweeps = Kernel(be.make_loop(ele.neles, func=_tlsweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.tfspr)
tusweeps = Kernel(be.make_loop(ele.neles, func=_tusweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.tfspr)
kernels += [pre_tlusgs, tlsweeps, tusweeps]
# Collect kernels and make meta kernels
ele.lusgs = MetaKernel(kernels)
# Update kernel
ele.update = Kernel(be.make_loop(ele.neles, _update),
ele.upts[0], ele.upts[1]
)
[docs]
def step(self):
resid = self.rhs(0, 1, is_norm=True)
self.sys.spec_rad()
self.sys.eles.lusgs()
self.sys.eles.update()
self.sys.post(0)
return 0, resid
[docs]
class ColoredLUSGS(BaseSteadyIntegrator):
name = 'colored-lu-sgs'
nreg = 3
impl_op = 'spectral-radius'
[docs]
def construct_stages(self):
from pybaram.integrators.lusgs import make_lusgs_common, make_lusgs_update, make_colored_lusgs
be = self.be
# colored-LU-SGS for each elements
for ele in self.sys.eles:
# Get Coloring result
ncolor, icolor, lev_color = ele.coloring()
# diagonal and lambda array
diag = np.empty(ele.neles)
# Get Python functions of flux and wave speed
_flux = ele.flux_container()
nv = (0, ele.nfvars)
# Compile LU-SGS functions
_update = make_lusgs_update(ele)
_pre_lusgs = make_lusgs_common(ele, factor=1.0)
_lsweep, _usweep = make_colored_lusgs(
be, ele, nv, icolor, lev_color, _flux
)
# Initiate LU-SGS kernel objects
pre_lusgs = Kernel(
be.make_loop(ele.neles, _pre_lusgs),
ele.dt, diag, ele.fspr
)
lsweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_lsweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.fspr
)
for n0, ne in zip(ncolor[:-1], ncolor[1:])
]
usweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_usweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.fspr
)
for n0, ne in zip(ncolor[::-1][1:], ncolor[::-1][:-1])
]
kernels = [pre_lusgs, *lsweeps, *usweeps]
# LU-SGS for turbulent variables
if self._is_turb:
# Get Python function of flux and wave speed for turbulent variables
_tflux = ele.tflux_container()
tnv = (ele.nfvars, ele.nvars)
# Compile LU-SGS functions for turbulent variables
_pre_tlusgs = make_lusgs_common(ele, factor=self._tcfl_fac)
_tlsweep, _tusweep = make_colored_lusgs(
be, ele, tnv, icolor, lev_color, _tflux
)
# Initiate LU-SGS kernel objects for turbulent variables
pre_tlusgs = Kernel(
be.make_loop(ele.neles, _pre_tlusgs),
ele.dt, diag, ele.tfspr
)
tlsweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_tlsweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.tfspr
)
for n0, ne in zip(ncolor[:-1], ncolor[1:])
]
tusweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_tusweep),
ele.upts[0], ele.upts[1], ele.upts[2], diag, ele.dsrc, ele.tfspr
)
for n0, ne in zip(ncolor[::-1][1:], ncolor[::-1][:-1])
]
kernels += [pre_tlusgs, *tlsweeps, *tusweeps]
# Collect kernels and make meta kernels
ele.lusgs = MetaKernel(kernels)
# Update kernel
ele.update = Kernel(be.make_loop(ele.neles, _update),
ele.upts[0], ele.upts[1]
)
[docs]
def step(self):
resid = self.rhs(0, 1, is_norm=True)
self.sys.spec_rad()
self.sys.eles.lusgs()
self.sys.eles.update()
self.sys.post(0)
return 0, resid
[docs]
class BlockJacobi(BaseSteadyIntegrator):
name = 'jacobi'
nreg = 4
impl_op = 'approx-jacobian'
[docs]
def construct_stages(self):
from pybaram.integrators.jacobi import make_jacobi_update, make_jacobi_sweep, \
make_pre_jacobi, make_tpre_jacobi
# Constants for Jacobi method
self.subiter = self.cfg.getint('solver-time-integrator', 'sub-iter', 10)
self.subtol = self.cfg.getfloat('solver-time-integrator', 'sub-tol', 0.05)
be = self.be
for ele in self.sys.eles:
# Temporal arrays
diag = np.empty((ele.nfvars, ele.nfvars, ele.neles))
ele.subres = np.zeros((ele.neles,), dtype=np.float64)
nv = (0, ele.nfvars)
# Diagonal matrix computation kernel
_pre_jacobi = make_pre_jacobi(be, ele, nv)
pre_jacobi = Kernel(be.make_loop(ele.neles, _pre_jacobi),
ele.dt, diag, ele.jmat)
# Sweep and compute subiteration step solution
_sweep, _compute = make_jacobi_sweep(be, ele, nv)
sweep = Kernel(be.make_loop(ele.neles, _sweep),
ele.upts[1], ele.upts[2], ele.upts[3], ele.jmat)
compute = Kernel(be.make_loop(ele.neles, _compute),
ele.upts[2], ele.upts[3], diag)
main_kernels = [sweep, compute]
if self._is_turb:
tdiag = np.empty((ele.nturbvars, ele.nturbvars, ele.neles))
tnv = (ele.nfvars, ele.nvars)
_srcjacobian = ele.make_source_jacobian()
_pre_tjacobi = make_tpre_jacobi(be, ele, tnv, _srcjacobian, self._tcfl_fac)
pre_tjacobi = Kernel(be.make_loop(ele.neles, _pre_tjacobi),
ele.upts[0], ele.dt, tdiag, ele.tjmat, ele.dsrc)
# Turbulent sweeps and compute kernel
_tsweep, _tcompute = make_jacobi_sweep(be, ele, tnv)
tsweep = Kernel(be.make_loop(ele.neles, _tsweep),
ele.upts[1], ele.upts[2], ele.upts[3], ele.tjmat)
tcompute = Kernel(be.make_loop(ele.neles, _tcompute),
ele.upts[2], ele.upts[3], tdiag)
main_kernels += [tsweep, tcompute]
pre_kernels = [pre_jacobi, pre_tjacobi]
ele.pre_jacobi = MetaKernel(pre_kernels)
else:
ele.pre_jacobi = pre_jacobi
# Collect kernels and make meta kernels
ele.jacobi_sweep = MetaKernel(main_kernels)
# Update kernel
_update = make_jacobi_update(ele)
ele.update = Kernel(be.make_loop(ele.neles, _update),
ele.upts[0], ele.upts[2], ele.subres)
# Initialize dub array
ele.upts[2][:] = 0.0
[docs]
def step(self):
resid = self.rhs(0, 1, is_norm=True)
self.sys.approx_jac()
self.sys.eles.pre_jacobi()
subresid = 0.0
# Sub-iteration for Jacobi method
for it in range(self.subiter):
# Jacobi sweep
self.sys.eles.jacobi_sweep()
# Compute sub-residual from all elements
drhoi = 0.0
for ele in self.sys.eles:
ele.subres -= ele.upts[2][self._res_idx]
drhoi += sum(ele.subres**2)
ele.subres[:] = ele.upts[2][self._res_idx]
# Collect L2-norm for all domain
drho = self._comm.allreduce(drhoi, op=MPI.SUM)
drho = np.sqrt(drho)
if it == 0:
drho1 = drho
else:
subresid = drho/drho1
if subresid < self.subtol:
break
self.sys.eles.update()
self.sys.post(0)
self.subitnum = it
self.subres = subresid
return 0, resid
[docs]
class BlockLUSGS(BaseSteadyIntegrator):
name = 'blu-sgs'
nreg = 3
impl_op = 'approx-jacobian'
[docs]
def construct_stages(self):
from pybaram.integrators.blusgs import make_pre_blusgs, make_tpre_blusgs, \
make_serial_blusgs, make_blusgs_update
# Constants for Block LU-SGS subiteration
self.subiter = self.cfg.getint('solver-time-integrator', 'sub-iter', 10)
self.subtol = self.cfg.getfloat('solver-time-integrator', 'sub-tol', 0.1)
be = self.be
# Block LU-SGS method for each element
for ele in self.sys.eles:
# Get reordering result
mapping, unmapping = ele.reordering()
# Temporal array and matrix
diag = np.empty((ele.nfvars, ele.nfvars, ele.neles))
ele.subres = np.zeros((ele.neles,), dtype=np.float64)
nv = (0, ele.nfvars)
# Compile Block LU-SGS functions
_update = make_blusgs_update(ele)
_pre_blusgs = make_pre_blusgs(be, ele, nv)
_lower, _upper = make_serial_blusgs(be, ele, nv, mapping, unmapping)
# Initiate Block LU-SGS kernel objects
pre_blusgs = Kernel(be.make_loop(ele.neles, _pre_blusgs),
ele.dt, diag, ele.jmat)
# sweep kernels
lsweeps = Kernel(be.make_loop(ele.neles, func=_lower),
ele.upts[1], ele.upts[2], diag, ele.jmat)
usweeps = Kernel(be.make_loop(ele.neles, func=_upper),
ele.upts[1], ele.upts[2], diag, ele.jmat)
sweep_kernels = [lsweeps, usweeps]
# Block LU-SGS for turbulent model
if self._is_turb:
tdiag = np.empty((ele.nturbvars, ele.nturbvars, ele.neles))
tnv = (ele.nfvars, ele.nvars)
_srcjacobian = ele.make_source_jacobian()
_pre_tblusgs = make_tpre_blusgs(be, ele, tnv, _srcjacobian, self._tcfl_fac)
pre_tblusgs = Kernel(be.make_loop(ele.neles, _pre_tblusgs),
ele.upts[0], ele.dt, tdiag, ele.tjmat, ele.dsrc)
_tlsweep, _tusweep = make_serial_blusgs(be, ele, tnv, mapping, unmapping)
tlsweep = Kernel(be.make_loop(ele.neles, _tlsweep),
ele.upts[1], ele.upts[2], tdiag, ele.tjmat)
tusweep = Kernel(be.make_loop(ele.neles, _tusweep),
ele.upts[1], ele.upts[2], tdiag, ele.tjmat)
sweep_kernels += [tlsweep, tusweep]
pre_kernels = [pre_blusgs, pre_tblusgs]
ele.pre_blusgs = MetaKernel(pre_kernels)
else:
ele.pre_blusgs = pre_blusgs
# Collect all kernels
ele.blusgs_sweep = MetaKernel(sweep_kernels)
# Update kernel
ele.update = Kernel(be.make_loop(ele.neles, _update),
ele.upts[0], ele.upts[2], ele.subres)
# Initialize dub array
ele.upts[2][:] = 0.0
[docs]
def step(self):
resid = self.rhs(0, 1, is_norm=True)
self.sys.approx_jac()
# Compute diagonal matrix
self.sys.eles.pre_blusgs()
subresid = 0.0
# Subiteration for Block LU-SGS
for it in range(self.subiter):
# Block LU-SGS sweep
self.sys.eles.blusgs_sweep()
# Compute sub-residual from all elements
drhoi = 0.0
for ele in self.sys.eles:
ele.subres -= ele.upts[2][self._res_idx]
drhoi += sum(ele.subres**2)
ele.subres[:] = ele.upts[2][self._res_idx]
# Collect L2-norm for all domain
drho = self._comm.allreduce(drhoi, op=MPI.SUM)
drho = np.sqrt(drho)
# Check sub-convergence
if it == 0:
drho1 = drho
else:
subresid = drho/drho1
if subresid < self.subtol:
break
self.sys.eles.update()
self.sys.post(0)
self.subitnum = it
self.subres = subresid
return 0, resid
[docs]
class ColoredBlockLUSGS(BaseSteadyIntegrator):
name = 'colored-blu-sgs'
nreg = 3
impl_op = 'approx-jacobian'
[docs]
def construct_stages(self):
from pybaram.integrators.blusgs import make_pre_blusgs, make_tpre_blusgs, \
make_colored_blusgs, make_blusgs_update
# Constants for Block LU-SGS subiteration
self.subiter = self.cfg.getint('solver-time-integrator', 'sub-iter', 10)
self.subtol = self.cfg.getfloat('solver-time-integrator', 'sub-tol', 0.1)
be = self.be
# Colored Block LU-SGS for each elements
for ele in self.sys.eles:
# Get coloring result
ncolor, icolor, lev_color = ele.coloring()
# Temporal array and matrix
diag = np.empty((ele.nfvars, ele.nfvars, ele.neles))
ele.subres = np.zeros((ele.neles,), dtype=np.float64)
nv = (0, ele.nfvars)
# Compile Block LU-SGS functions
_update = make_blusgs_update(ele)
_pre_blusgs = make_pre_blusgs(be, ele, nv)
_sweep = make_colored_blusgs(be, ele, nv, icolor, lev_color)
# Initiate Block LU-SGS kernel objects
pre_blusgs = Kernel(
be.make_loop(ele.neles, _pre_blusgs),
ele.dt, diag, ele.jmat
)
lsweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_sweep),
ele.upts[1], ele.upts[2], diag, ele.jmat)
for n0, ne in zip(ncolor[:-1], ncolor[1:])
]
usweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_sweep),
ele.upts[1], ele.upts[2], diag, ele.jmat)
for n0, ne in zip(ncolor[::-1][1:], ncolor[::-1][:-1])
]
sweep_kernels = [*lsweeps, *usweeps]
# Colored Block LU-SGS for turbulence model
if self._is_turb:
# digonal matrix for turbulence model
tdiag = np.empty((ele.nturbvars, ele.nturbvars, ele.neles))
tnv = (ele.nfvars, ele.nvars)
# Source term Jacobian
_srcjacobian = ele.make_source_jacobian()
_pre_tblusgs = make_tpre_blusgs(be, ele, tnv, _srcjacobian, self._tcfl_fac)
pre_tblusgs = Kernel(be.make_loop(ele.neles, _pre_tblusgs),
ele.upts[0], ele.dt, tdiag, ele.tjmat, ele.dsrc)
_tsweep = make_colored_blusgs(be, ele, tnv, icolor, lev_color)
tlsweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_tsweep),
ele.upts[1], ele.upts[2], tdiag, ele.tjmat)
for n0, ne in zip(ncolor[:-1], ncolor[1:])
]
tusweeps = [
Kernel(be.make_loop(n0=n0, ne=ne, func=_tsweep),
ele.upts[1], ele.upts[2], tdiag, ele.tjmat)
for n0, ne in zip(ncolor[::-1][1:], ncolor[::-1][:-1])
]
sweep_kernels += [*tlsweeps, *tusweeps]
pre_kernels = [pre_blusgs, pre_tblusgs]
ele.pre_blusgs = MetaKernel(pre_kernels)
else:
ele.pre_blusgs = pre_blusgs
# Collect all kernels
ele.blusgs_sweep = MetaKernel(sweep_kernels)
# Update kernel
ele.update = Kernel(be.make_loop(ele.neles, _update),
ele.upts[0], ele.upts[2], ele.subres)
# Initialize dub array
ele.upts[2][:] = 0.0
[docs]
def step(self):
resid = self.rhs(0, 1, is_norm=True)
self.sys.approx_jac()
# Compute diagonal matrix
self.sys.eles.pre_blusgs()
subresid = 0.0
# Subiteration for Block LU-SGS
for it in range(self.subiter):
# Block LU-SGS sweep
self.sys.eles.blusgs_sweep()
# Compute sub-residual from all elements
drhoi = 0.0
for ele in self.sys.eles:
ele.subres -= ele.upts[2][self._res_idx]
drhoi += sum(ele.subres**2)
ele.subres[:] = ele.upts[2][self._res_idx]
# Collect L2-norm for all domain
drho = self._comm.allreduce(drhoi, op=MPI.SUM)
drho = np.sqrt(drho)
# Check sub-convergence
if it == 0:
drho1 = drho
else:
subresid = drho/drho1
if subresid < self.subtol:
break
self.sys.eles.update()
self.sys.post(0)
self.subitnum = it
self.subres = subresid
return 0, resid